/***************************************************************
                       check.c
Called from editproj.c, which is the menu of the Project window
***************************************************************/
#include "clam.h"
#include <float.h>

#define real_olap(l1,r1,l2,r2) ((r1>=l2 && r1<=r2) || (r2>=l1 && r2<=r1))

#define NUM_CHR 200 
#define NUM_ANCHORS 10000 

extern FILE *graphQueryOpen();  
extern int Zbcm();

/******************************************************
               DEF: clone_marker_ctg
If marker name without sq is subset of clone name,
does the marker have the most hits in the contig that contains
the clones. The is for BSS "sq" clones and FSD "sd1" clones.
******************************************************/
void clone_marker_ctg()
{
int i, j,  index;
struct marker *marker;
struct markerclone *mclp;
char  tmp[20];
int  cnt=0, mainctg, mainpos;
int hits=0, cntctg[5000], good=0, bad=0, k, big;

  for (j=i=0; i<arrayMax(markerdata); i++) {
      mainctg= -1; mainpos=0;
      marker = arrp(markerdata, i, MARKER);
      if (strstr(marker->marker,"sq")==NULL) continue;
      hits=cnt=0;
      for (k=0; k<5000; k++) cntctg[k] = 0;
      index = marker->cloneindex;
      mainctg= -1;
      for (k=0;k<20;k++) tmp[k]='\0';
      strncpy(tmp,marker->marker, strlen(marker->marker)-2);

      mclp = marker->nextclone;
      do{
           if (strstr(arrp(acedata, index, CLONE)->clone,tmp) != NULL)
               mainctg=arrp(acedata, index, CLONE)->ctg;
           cntctg[arrp(acedata, index, CLONE)->ctg]++;
           hits++;
           if (mclp==NULL) break;
           index = mclp->cloneindex;
           mclp = mclp->nextclone;
      } while (1);

      if (mainctg == -1) {
          strcat(tmp,"sd1");
          tmp[strlen(tmp)]='\0';
          if(fppFind(acedata,tmp,&index, cloneOrder)) 
             mainctg=arrp(acedata, index, CLONE)->ctg;
      }

      if (mainctg == -1) {
         bad++;
         for (k=0,big=mainctg; k<5000; k++) 
           if (cntctg[k]>0) cnt++;
         printf("XXX %15s --- (--) --- (--) Contigs %2d Hits %2d\n",
                     marker->marker, cnt,hits);
         continue;
      }
      for (k=0,big=mainctg; k<5000; k++) {
           if (cntctg[k]>0) cnt++;
           if (cntctg[k]>cntctg[big]) big=k;
      }
      if (big!=mainctg) {bad++; strcpy(tmp,"***");}
      else {good++; strcpy(tmp,"   ");}
      printf("%s %15s ctg%3d (%2d) ctg%3d (%2d) Contigs %2d Hits %2d\n",
                     tmp, marker->marker,mainctg,
                     cntctg[mainctg], big, cntctg[big],cnt,hits);
  }
  printf("Good %d  Bad %d\n",good, bad);
}
/******************************************************
               DEF: flip_contig
*******************************************************/
void flip_contig(int c)
{
int  i, k, rt, lt;
CLONE *clp;
int y;
void move_selected();

  lt=99999; rt=-99999;
  for (k= contigs[c].start; k != -1; k = clp->next) {
       clp = arrp(acedata,k,CLONE);
       rt = MaX(rt, clp->y);
       lt = MiN(lt, clp->x);
  }
  for (k= contigs[c].start; k != -1; k = clp->next) {
       clp = arrp(acedata,k,CLONE);
       clp->selected = TRUE;
       i = clp->x;
       y = clp->y;
       clp->x = (- clp->y + rt) + lt;
       clp->y = (- i + rt) + lt;
  }
PRTMESS=0;
  move_selected(c,c,0);
PRTMESS=1;
}
/**************************************************************
                      dosize
Size on Project Menu.
Humsize.fpc - 
for paper, run on sizes for all but vector counts,
Table 1:
  Calculate lengths of clones and distribution of number of bands
Table 2 2nd half:
  Calculate size of bands
**************************************************************/
void Zdosize()
{
int v, i, j, k,kk, b1, b2;
CLONE *cp;
int len, all_bands=0;
int cl=10,  cs;
int less_len[20], cnt_clones_with_len[20], cnt_bands_with_len[20];
int less_sizes[70], cnt_sizes[70];
int last, cnt_clones=0;
float tot_avgS=0.0, tot_avgL=0.0, f, avg_bands_with_len[20], std[20];

    if (!Proj.variable) {
       printf("This is only relevant for FPC files made with sizes.\n");
       printf("Variable tolerance on the Configuration window must be on.\n");
       return;
   }
   if (!arrayExists(bands))
        if (fpRead() == -1) return;

   printf("Start size statistics\n");
   for (j=0, i=100000; i<230000; i+=10000, j++) {
       less_len[j]= i;
       cnt_clones_with_len[j]=0;
   }
   less_len[j]= less_len[j-1];
   cnt_clones_with_len[j]=0;
   cl = j;

   for (j=0, i=500; i<28000; i+=1000, j++) {
       less_sizes[j]= i;
       cnt_sizes[j]=0;
   }
   less_sizes[j]= less_sizes[j-1];
   cnt_sizes[j]=0;
   cs = j;

   for (i=0; i<15; i++) cnt_bands_with_len[i]=0;

   for (i=0; i< arrayMax(acedata); i++) {
      cp = arrp(acedata, i, CLONE);
      if (cp->fp==NULL || cp->fp->b2==0) continue;
      if (cp->clone[0]=='!') continue;

      b1 = cp->fp->b1;
      cnt_clones++;

      b2 = last = len = 0;
      for (j=0; j< cp->fp->b2; j++) {
          v = arr(bands, b1+j-1, int);
          len+=v;
          b2++;
                     /* bin band values */
          for (k=0; k<cs; k++) {
               if (v < less_sizes[k]) {
                  cnt_sizes[k]++;
                  break;
               }
          }
          if (k==cs && v >= less_sizes[k]) cnt_sizes[k]++; 
      }
      tot_avgL += (float) len;
      if (b2!=0) tot_avgS += (float) (len/b2);
      all_bands += b2;
                  /* bin clone sizes */
      for (kk=-1, k=0; k<cl; k++) 
        if (len < less_len[k]) {
           kk = k;
           break;
        }
      if (kk == -1) kk=k;
      cnt_clones_with_len[kk]++;
      cnt_bands_with_len[kk]+=b2;
   }

/* std */
   
   for (i=0; i<=cl; i++) {
      avg_bands_with_len[i] = 
           (float)cnt_bands_with_len[i]/(float)cnt_clones_with_len[i];
      std[i] = 0.0;
   }
   for (i=0; i< arrayMax(acedata); i++) {
      cp = arrp(acedata, i, CLONE);
      if (cp->fp==NULL || cp->fp->b2==0) continue;
      if (cp->clone[0]=='!') continue;

      b1 = cp->fp->b1;
      for (b2=len=j=0; j< cp->fp->b2; j++) {
          v = arr(bands, b1+j-1, int);
          len+=v;
          b2++;
      }
      for (k=0; k<cl; k++) 
        if (len < less_len[k]) {
           std[k] += abs(avg_bands_with_len[k] - b2);
           break;
        }
      if (len >= less_len[k]) std[k] += abs(avg_bands_with_len[k] - b2);
   }
   for (i=0; i<=cl; i++) 
      if (cnt_clones_with_len[i]!=0)
          std[i] = std[i]/(float)cnt_clones_with_len[i];

/* clone lenghts */
   f = (float)cnt_clones;
   if (cnt_clones==0) return;
   printf("\nLength  Percent  Avg#bands +/-Std\n");
   for (i=0; i<cl; i++)
     printf("< %d %5.2f %5.2f +/- %5.3f\n", less_len[i],
        100.0 *((float)cnt_clones_with_len[i]/f), avg_bands_with_len[i], std[i]);

   printf("> %d %5.2f %5.2f +/- %5.3f\n", less_len[i],
        100.0 *((float)cnt_clones_with_len[i]/f), avg_bands_with_len[i], std[i]);
   printf("\nAverage length %d\n\n", (int) (tot_avgL/f));

/* band sizes */
    for (i=0; i<cs; i++)
       printf("< %d %5.2f\n", less_sizes[i], 
          100.0 *((float)cnt_sizes[i]/(float) all_bands));
    
    printf("> %d %5.2f\n\n", less_sizes[i], 
          100.0 *((float)cnt_sizes[i]/(float) all_bands));
    printf("Clones %d\n",cnt_clones);
}

/**************************************************************
                     doband
Size on Project Menu.
Humanmap.fpc - 
For paper  run on bands for vector counts only.
Table 2: Distriubion of rates
Percent vector
**************************************************************/
void Zdoband()
{
int v, i, j, k, b1, b2;
CLONE *cp;
int len, all_bands=0;
int cl=10, cr;
int less_rates[50], cnt_rates[50];
int last, cnt_clones=0;
float f;

    if (Proj.variable) {
       printf("This is only relevant for FPC files made with rates.\n");
       printf("Variable tolerance on the Configuration window must be off.\n");
       return;
   }
   if (!arrayExists(bands))
        if (fpRead() == -1) return;

   cl = j;
   for (j=0, i=200; i<4400; i+=200, j++) {
       less_rates[j]= i;
       cnt_rates[j]=0;
   }
   less_rates[j]= less_rates[j-1];
   cnt_rates[j] = 0;
   cr = j;

   for (i=0; i< arrayMax(acedata); i++) {
      cp = arrp(acedata, i, CLONE);
      if (cp->fp==NULL || cp->fp->b2==0) continue;
      if (cp->clone[0]=='!') continue;
      /*if (cp->fp->gelname[0]!='m' && cp->fp->gelname[0]!='N') continue;*/

      b1 = cp->fp->b1;
      b2 = cp->fp->b2;
      cnt_clones++;

      last = len = 0;
      for (j=0; j< b2; j++) {
          v = arr(bands, b1+j-1, int);
          all_bands++;
          for (k=0; k<cr; k++) 
               if (v < less_rates[k]) {
                  cnt_rates[k]++;
                  break;
               }
         if (k==cr && v >= less_rates[k]) cnt_rates[k]++; 
      }
   }

   f = (float)cnt_clones;
  
     /* band lenghts */
    for (i=0; i<cr; i++)
       printf("< %d %6d %5.2f\n", less_rates[i],cnt_rates[i],
             100.0 *((float)cnt_rates[i]/(float) all_bands));

    printf("> %d %6d %5.2f\n\n", less_rates[i],cnt_rates[i],
             100.0 *((float)cnt_rates[i]/(float) all_bands));
    printf("Clones %d\n",cnt_clones);
}

/***********************************************************/
/***********************************************************/
static long int Xmatch, Xcnt_all;
static int Xcnt_fp, Xtol[40];

static int check_two_fp(struct fpdata *fp1, struct fpdata *fp2)
{
int i, j, b;
int lstart, m, k, kbnd, l, idiff;

#ifdef PARALLEL
     copyLocalCz(&C1z);
     copyLocalCz(&C2z);
#endif

     for (j=i=0; i < fp1->b2; i++) {
       b = arr(bands, fp1->b1 + i - 1, int);
           /* get rid of dup if using humanmap and ours **/
       if (b > Pz.minVal && b < Pz.maxVal)
#ifdef PARALLEL
               C1z.localCoords[j++] = b;
#else
               C1z.coords[j++] = b;
#endif
     }
     C1z.nbands = j;

     for (j=i=0; i < fp2->b2; i++) {
       b = arr(bands, fp2->b1 + i - 1, int);
       if (b > Pz.minVal && b < Pz.maxVal)
#ifdef PARALLEL
               C2z.localCoords[j++] = b;
#else
               C2z.coords[j++] = b;
#endif
     }
     C2z.nbands = j;

     Zsulston(&C1z,&C2z,&Sz);
     if (Sz.prob > Pz.cutFlt) return 0;

     lstart = 0;
     for (m=k = 0; k < C1z.nbands; ++k)
     {
#ifdef PARALLEL
         kbnd = C1z.localCoords[k];
#else
         kbnd = C1z.coords[k];
#endif
#ifdef PARALLEL
         if (C1z.localCoords[k]==-1) continue;
#else
         if (C1z.coords[k]==-1) continue;
#endif
         for (l = lstart; l < C2z.nbands; ++l)
         {
#ifdef PARALLEL
             if (C1z.localCoords[l]==-1) continue;
#else
             if (C1z.coords[l]==-1) continue;
#endif
#ifdef PARALLEL
             idiff = kbnd - C2z.localCoords[l];
#else
             idiff = kbnd - C2z.coords[l];
#endif
             if (Ztol(kbnd, idiff))
             {
                Xtol[abs(idiff)]++;
                m++;
                Xmatch++;
                lstart = l + 1;
                break;
             }
             else if (idiff < 0)
               {
                 lstart = l;
                 break;
               }
          }
     }
     if (m > 0)
        Xcnt_fp += abs(m - C1z.nbands) + abs(m - C2z.nbands);
     Xcnt_all+=C1z.nbands;
     Xcnt_all+=C2z.nbands;
     return 1;
}
/*************************************************
             DEF: check_tol
Table 3: check duplicate gels
*************************************************/
static void do_check_tol(int flag)
{
CLONE *clp;
int  c, i;
long int cnt_fp=0;
int  bad=0, good=0;

  if (!arrayExists(bands)) 
      if (fpRead() == -1) return;

  for (i=0; i<40; i++) Xtol[i]=0;
  Xmatch=Xcnt_all=Xcnt_fp=0;

  for (c=0; c<arrayMax(acedata); c++) 
  {
     clp = arrp(acedata, c, CLONE);
     if (clp->clone[0]=='!') continue;
     if(clp->fp == NULL || clp->fp->b2==0) continue;
     if(clp->fp->next == NULL || clp->fp->next->b2==0) continue;
     if (check_two_fp(clp->fp, clp->fp->next)) {
         good++;
         if (flag==0) printf("%4d. %14s %12s %12s %2db %2db %2d %1.0e %ld\n",
              good, clp->clone, 
             clp->fp->gelname, clp->fp->next->gelname, 
              C1z.nbands, C2z.nbands, Sz.match, Sz.prob,  cnt_fp);
     }
     else {
         bad++;
         if (flag==1) printf("%4d. %14s %12s %12s %2db %2db %2d %1.0e %ld\n",
             bad, clp->clone, 
             clp->fp->gelname, clp->fp->next->gelname, 
             C1z.nbands, C2z.nbands, Sz.match, Sz.prob,  cnt_fp);
     }
  }
  for (i=0; i<=Pz.tol; i++) 
      printf("%2d %6d %3.3f\n",i,Xtol[i],100.0*((float)Xtol[i]/(float)Xmatch));
  
  printf("%s Bad %d Good %d Fp %5.2f (%d,%1.0e, min %d, max %d)\n",
        Proj.mapname, bad,good, 100.0*((float) Xcnt_fp/(float)Xcnt_all),
              Pz.tol, Pz.cutFlt, Pz.minVal, Pz.maxVal);
}
void Zcheck_tol() {
   do_check_tol(0);
}
void Zcheck_tol2() {
   do_check_tol(1);
}

/****************************************************************
		file_check_fp
		Friedrich Engler

Reads in a file containing pairs of clone names.  Runs check_two_fp
on each pair's fingerprint.  A message is printed to stdout for each
mismatch.
*****************************************************************/
void file_check_fp(){
   FILE *readfile;
   char clone_name1[38],clone_name2[38];
   int index1,index2;
   CLONE *clp1,*clp2;
   int num_match=0,num_mismatch=0;
   char tempdirname[DIR_BUFFER_SIZE];
   char tempfilename[FIL_BUFFER_SIZE];

   if (!arrayExists(bands))
      if (fpRead() == -1) return;

   readfile=graphQueryOpen(tempdirname,tempfilename,"txt","r","Enter file name of clone pairs");
   if(readfile!=NULL){
      fprintf(stdout,"Starting comparisons...\n");
      while(!feof(readfile)){
         if(fscanf(readfile,"%37s %37s",clone_name1,clone_name2)<2){
            if(feof(readfile)) break;
            fprintf(stderr,"Error reading clone names.\n");
            fclose(readfile);
            return;
         }
         if(fppFind(acedata,clone_name1,&index1,(void *)cloneOrder))
            clp1=arrp(acedata,index1,CLONE);
         else{
            fprintf(stdout,"Clone %s not found.  Skipping pair.\n",clone_name1);
            continue;
         }
         if(fppFind(acedata,clone_name2,&index2,(void *)cloneOrder))
            clp2=arrp(acedata,index2,CLONE);
         else{
            fprintf(stdout,"Clone %s not found.  Skipping pair.\n",clone_name2);
            continue;
         }
         if(check_two_fp(clp1->fp,clp2->fp)){
            num_match++; 
            fprintf(stdout,"Match:    %14s %14s %12s %12s %2db %2db %2d %1.0e\n",
                    clone_name1,clone_name2,clp1->fp->gelname,clp2->fp->gelname,
                    C1z.nbands,C2z.nbands,Sz.match,Sz.prob);
         }
         else{
            num_mismatch++; 
            fprintf(stdout,"Mismatch: %14s %14s %12s %12s %2db %2db %2d %1.0e\n",
                    clone_name1,clone_name2,clp1->fp->gelname,clp2->fp->gelname,
                    C1z.nbands,C2z.nbands,Sz.match,Sz.prob);
         }
      }
      fprintf(stdout,"Total matches: %d    Total mismatches: %d\ndone.\n",
              num_match,num_mismatch);
   }
   else
      fprintf(stdout,"No comparisons done\n");
   fclose(readfile);
}
